Completed
Push — master ( 15786e...f61697 )
by Elbert
01:01
created

wappalyzer.js ➔ ... ➔ ???   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
c 2
b 0
f 0
nc 1
nop 1
dl 0
loc 26
rs 8.8571
1
/**
2
 * Wappalyzer v5
3
 *
4
 * Created by Elbert Alias <[email protected]>
5
 *
6
 * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
7
 */
8
9
'use strict';
10
11
const validation = {
12
  hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
13
  hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/
14
};
15
16
var wappalyzer = {
17
  apps: {},
18
  categories: {},
19
  driver: {}
20
};
21
22
var detected = {};
23
var hostnameCache = {};
24
var adCache = [];
25
26
wappalyzer.config = {
27
  websiteURL: 'https://wappalyzer.com/',
28
  twitterURL: 'https://twitter.com/Wappalyzer',
29
  githubURL: 'https://github.com/AliasIO/Wappalyzer',
30
};
31
32
/**
33
 * Log messages to console
34
 */
35
wappalyzer.log = (message, source, type) => {
36
  wappalyzer.driver.log(message, source || '', type || 'debug');
37
};
38
39
wappalyzer.analyze = (hostname, url, data, context) => {
40
  var apps = {};
41
42
  // Remove hash from URL
43
  data.url = url = url.split('#')[0];
44
45
  if ( typeof data.html !== 'string' ) {
46
    data.html = '';
47
  }
48
49
  if ( detected[url] === undefined ) {
50
    detected[url] = {};
51
  }
52
53
  Object.keys(wappalyzer.apps).forEach(appName => {
54
    apps[appName] = detected[url] && detected[url][appName] ? detected[url][appName] : new Application(appName, wappalyzer.apps[appName]);
55
56
    var app = apps[appName];
57
58
    if ( url ) {
59
      analyzeUrl(app, url);
60
    }
61
62
    if ( data.html ) {
63
      analyzeHtml(app, data.html);
64
      analyzeScript(app, data.html);
65
      analyzeMeta(app, data.html);
66
    }
67
68
    if ( data.headers ) {
69
      analyzeHeaders(app, data.headers);
70
    }
71
72
    if ( data.env ) {
73
      analyzeEnv(app, data.env);
74
    }
75
76
    if ( data.robotsTxt ) {
77
      analyzeRobotsTxt(app, data.robotsTxt);
78
    }
79
  })
80
81
  Object.keys(apps).forEach(appName => {
82
    var app = apps[appName];
83
84
    if ( !app.detected || !app.getConfidence() ) {
85
      delete apps[app.name];
86
    }
87
  });
88
89
  resolveExcludes(apps);
90
  resolveImplies(apps, url);
91
92
  cacheDetectedApps(apps, url);
93
  trackDetectedApps(apps, url, hostname, data.html);
94
95
  if ( Object.keys(apps).length ) {
96
    wappalyzer.log(Object.keys(apps).length + ' apps detected: ' + Object.keys(apps).join(', ') + ' on ' + url, 'core');
97
  }
98
99
  wappalyzer.driver.displayApps(detected[url], context);
100
}
101
102
/**
103
 * Cache detected ads
104
 */
105
wappalyzer.cacheDetectedAds = ad => {
106
  adCache.push(ad);
107
}
108
109
/**
110
 *
111
 */
112
wappalyzer.robotsTxtAllows = url => {
113
  return new Promise((resolve, reject) => {
114
    var parsed = wappalyzer.parseUrl(url);
115
116
    wappalyzer.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:')
117
      .then(robotsTxt => {
118
        robotsTxt.forEach(disallow => {
119
          if ( parsed.pathname.indexOf(disallow) === 0 ) {
120
            reject();
121
          }
122
        });
123
124
        resolve();
125
      });
126
  });
127
};
128
129
/**
130
 * Parse a URL
131
 */
132
wappalyzer.parseUrl = url => {
133
  var a = wappalyzer.driver.document.createElement('a');
134
135
  a.href = url;
136
137
  a.canonical = a.protocol + '//' + a.host + a.pathname;
138
139
  return a;
140
}
141
142
/**
143
 *
144
 */
145
wappalyzer.parseRobotsTxt = robotsTxt => {
146
  var userAgent;
147
  var disallow = [];
148
149
  robotsTxt.split('\n').forEach(line => {
150
    var matches = /^User-agent:\s*(.+)$/i.exec(line);
151
152
    if ( matches ) {
153
      userAgent = matches[1].toLowerCase();
154
    } else {
155
      if ( userAgent === '*' || userAgent === 'wappalyzer' ) {
156
        matches = /^Disallow:\s*(.+)$/i.exec(line);
157
158
        if ( matches ) {
159
          disallow.push(matches[1]);
160
        }
161
      }
162
    }
163
  });
164
165
  return disallow;
166
}
167
168
/**
169
 *
170
 */
171
wappalyzer.ping() {
172
  if ( Object.keys(hostnameCache).length >= 50 || adCache.length >= 50 ) {
173
    wappalyzer.driver.ping(hostnameCache, adCache);
174
175
    hostnameCache = {};
176
    adCache = [];
177
  }
178
}
179
180
/**
181
 * Enclose string in array
182
 */
183
function asArray(value) {
184
  return typeof value === 'string' ? [ value ] : value;
185
}
186
187
/**
188
 * Parse apps.json patterns
189
 */
190
function parsePatterns(patterns) {
191
  var parsed = {};
192
193
  // Convert string to object containing array containing string
194
  if ( typeof patterns === 'string' || patterns instanceof Array ) {
195
    patterns = {
196
      main: asArray(patterns)
197
    };
198
  }
199
200
  for ( var key in patterns ) {
201
    parsed[key] = [];
202
203
    asArray(patterns[key]).forEach(pattern => {
204
      var attrs = {};
205
206
      pattern.split('\\;').forEach((attr, i) => {
207
        if ( i ) {
208
          // Key value pairs
209
          attr = attr.split(':');
210
211
          if ( attr.length > 1 ) {
212
            attrs[attr.shift()] = attr.join(':');
213
          }
214
        } else {
215
          attrs.string = attr;
216
217
          try {
218
            attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
219
          } catch (e) {
220
            attrs.regex = new RegExp();
221
222
            wappalyzer.log(e + ': ' + attr, 'error', 'core');
223
          }
224
        }
225
      });
226
227
      parsed[key].push(attrs);
228
    });
229
  }
230
231
  // Convert back to array if the original pattern list was an array (or string)
232
  if ( 'main' in parsed ) {
233
    parsed = parsed.main;
234
  }
235
236
  return parsed;
237
}
238
239
function resolveExcludes(apps) {
240
  var excludes = [];
241
242
  // Exclude app in detected apps only
243
  Object.keys(apps).forEach(appName => {
244
    var app = apps[appName];
245
246
    if ( app.props.excludes ) {
247
      asArray(app.props.excludes).forEach(excluded => {
248
        excludes.push(excluded);
249
      });
250
    }
251
  })
252
253
  // Remove excluded applications
254
  Object.keys(apps).forEach(appName => {
255
    if ( excludes.indexOf(appName) !== -1 ) {
256
      delete apps[appName];
257
    }
258
  })
259
}
260
261
function resolveImplies(apps, url) {
262
  var checkImplies = true;
263
264
  // Implied applications
265
  // Run several passes as implied apps may imply other apps
266
  while ( checkImplies ) {
267
    checkImplies = false;
268
269
    Object.keys(apps).forEach(appName => {
270
      var app = apps[appName];
271
272
      if ( app && app.implies ) {
273
        asArray(app.props.implies).forEach(implied => {
274
          implied = parsePatterns(implied)[0];
275
276
          if ( !wappalyzer.apps[implied.string] ) {
277
            wappalyzer.log('Implied application ' + implied.string + ' does not exist', 'core', 'warn');
278
279
            return;
280
          }
281
282
          if ( !( implied.string in apps ) ) {
283
            apps[implied.string] = detected[url] && detected[url][implied.string] ? detected[url][implied.string] : new Application(implied.string, true);
284
285
            checkImplies = true;
286
          }
287
288
          // Apply app confidence to implied app
289
          Object.keys(app.confidence).forEach(id => {
290
            apps[implied.string].confidence[id + ' implied by ' + appName] = app.confidence[id] * ( implied.confidence ? implied.confidence / 100 : 1 );
291
          });
292
        });
293
      }
294
    });
295
  }
296
}
297
298
/**
299
 * Cache detected applications
300
 */
301
function cacheDetectedApps(apps, url) {
302
  wappalyzer.driver.ping instanceof Function || return;
303
304
  Object.keys(apps).forEach(appName => {
305
    var app = apps[appName];
306
307
    // Per URL
308
    detected[url][appName] = app;
309
310
    Object.keys(app.confidence).forEach(id => {
311
      detected[url][appName].confidence[id] = app.confidence[id];
312
    });
313
  })
314
315
  wappalyzer.ping();
316
}
317
318
/**
319
 * Track detected applications
320
 */
321
function trackDetectedApps(apps, url, hostname, html) {
322
  wappalyzer.driver.ping instanceof Function || return;
323
324
  Object.keys(apps).forEach(appName => {
325
    var app = apps[appName];
326
327
    if ( detected[url][appName].getConfidence() >= 100 ) {
328
      if ( validation.hostname.test(hostname) && !validation.hostnameBlacklist.test(url) ) {
329
        wappalyzer.robotsTxtAllows(url)
330
          .then(() => {
331
            if ( !( hostname in hostnameCache ) ) {
332
              hostnameCache[hostname] = {
333
                applications: {},
334
                meta: {}
335
              };
336
            }
337
338
            if ( !( appName in hostnameCache[hostname].applications ) ) {
339
              hostnameCache[hostname].applications[appName] = {
340
                hits: 0
341
              };
342
            }
343
344
            hostnameCache[hostname].applications[appName].hits ++;
345
346
            if ( apps[appName].version ) {
347
              hostnameCache[hostname].applications[appName].version = app.version;
348
            }
349
          })
350
        .catch(() => console.log('Disallowed in robots.txt: ' + url))
351
      }
352
    }
353
  });
354
355
  // Additional information
356
  if ( hostname in hostnameCache ) {
357
    var match = html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i);
358
359
    if ( match && match.length ) {
360
      hostnameCache[hostname].meta['language'] = match[1];
361
    }
362
  }
363
364
  wappalyzer.ping();
365
}
366
367
/**
368
 * Analyze URL
369
 */
370
function analyzeUrl(app, url) {
371
  var patterns = parsePatterns(app.props.url);
372
373
  if ( patterns.length ) {
374
    patterns.forEach(pattern => {
375
      if ( pattern.regex.test(url) ) {
376
        addDetected(app, pattern, 'url', url);
377
      }
378
    });
379
  }
380
}
381
382
/**
383
 * Analyze HTML
384
 */
385
function analyzeHtml(app, html) {
386
  var patterns = parsePatterns(app.props.html);
387
388
  if ( patterns.length ) {
389
    patterns.forEach(pattern => {
390
      if ( pattern.regex.test(html) ) {
391
        addDetected(app, pattern, 'html', html);
392
      }
393
    });
394
  }
395
}
396
397
/**
398
 * Analyze script tag
399
 */
400
function analyzeScript(app, html) {
401
  var regex = new RegExp('<script[^>]+src=("|\')([^"\']+)', 'ig');
402
  var patterns = parsePatterns(app.props.script);
403
404
  if ( patterns.length ) {
405
    patterns.forEach(pattern => {
406
      var match;
407
408
      while ( ( match = regex.exec(html) ) ) {
409
        if ( pattern.regex.test(match[2]) ) {
410
          addDetected(app, pattern, 'script', match[2]);
411
        }
412
      }
413
    });
414
  }
415
}
416
417
/**
418
 * Analyze meta tag
419
 */
420
function analyzeMeta(app, html) {
421
  var regex = /<meta[^>]+>/ig;
422
  var patterns = parsePatterns(app.props.meta);
423
  var content;
424
  var match;
425
426
  while ( patterns && ( match = regex.exec(html) ) ) {
427
    for ( var meta in patterns ) {
428
      if ( new RegExp('(name|property)=["\']' + meta + '["\']', 'i').test(match) ) {
429
        content = match.toString().match(/content=("|')([^"']+)("|')/i);
430
431
        patterns[meta].forEach(pattern => {
432
          if ( content && content.length === 4 && pattern.regex.test(content[2]) ) {
433
            addDetected(app, pattern, 'meta', content[2], meta);
434
          }
435
        });
436
      }
437
    }
438
  }
439
}
440
441
/**
442
 * analyze response headers
443
 */
444
function analyzeHeaders(app, headers) {
445
  var patterns = parsePatterns(app.props.headers);
446
447
  if ( headers ) {
448
    Object.keys(patterns).forEach(header => {
449
      patterns[header].forEach(pattern => {
450
        header = header.toLowerCase();
451
452
        if ( header in headers && pattern.regex.test(headers[header]) ) {
453
          addDetected(app, pattern, 'headers', headers[header], header);
454
        }
455
      });
456
    });
457
  }
458
}
459
460
/**
461
 * Analyze environment variables
462
 */
463
function analyzeEnv(app, envs) {
464
  var patterns = parsePatterns(app.props.env);
465
466
  if ( patterns.length ) {
467
    patterns.forEach(pattern => {
468
      Object.keys(envs).forEach(env => {
469
        if ( pattern.regex.test(envs[env]) ) {
470
          addDetected(app, pattern, 'env', envs[env]);
471
        }
472
      })
473
    });
474
  }
475
}
476
477
/**
478
 * Analyze robots.txt
479
 */
480
function analyzeRobotsTxt(app, robotsTxt) {
481
  var patterns = parsePatterns(app.props.robotsTxt);
482
483
  if ( patterns.length ) {
484
    patterns.forEach(pattern => {
485
      if ( pattern.regex.test(robotsTxt) ) {
486
        addDetected(app, pattern, 'robotsTxt', robotsTxt);
487
      }
488
    });
489
  }
490
}
491
492
/**
493
 * Mark application as detected, set confidence and version
494
 */
495
function addDetected(app, pattern, type, value, key) {
496
  app.detected = true;
497
498
  // Set confidence level
499
  app.confidence[type + ' ' + ( key ? key + ' ' : '' ) + pattern.regex] = pattern.confidence || 100;
500
501
  // Detect version number
502
  if ( pattern.version ) {
503
    var versions = [];
504
    var version  = pattern.version;
505
    var matches  = pattern.regex.exec(value);
506
507
    if ( matches ) {
508
      matches.forEach((match, i) => {
509
        // Parse ternary operator
510
        var ternary = new RegExp('\\\\' + i + '\\?([^:]+):(.*)$').exec(version);
511
512
        if ( ternary && ternary.length === 3 ) {
513
          version = version.replace(ternary[0], match ? ternary[1] : ternary[2]);
514
        }
515
516
        // Replace back references
517
        version = version.replace(new RegExp('\\\\' + i, 'g'), match || '');
518
      });
519
520
      if ( version && versions.indexOf(version) === -1 ) {
521
        versions.push(version);
522
      }
523
524
      if ( versions.length ) {
525
        // Use the longest detected version number
526
        app.version = versions.reduce((a, b) => a.length > b.length ? a : b);
527
      }
528
    }
529
  }
530
}
531
532
/**
533
 * Application class
534
 */
535
class Application {
536
  constructor(name, props, detected) {
537
    this.confidence      = {};
538
    this.confidenceTotal = 0;
539
    this.detected        = Boolean(detected);
540
    this.excludes        = [];
541
    this.name            = name;
542
    this.props           = props;
543
    this.version         = '';
544
  }
545
546
  /**
547
   * Calculate confidence total
548
   */
549
  getConfidence() {
550
    var total = 0;
551
552
    for ( var id in this.confidence ) {
553
      total += this.confidence[id];
554
    }
555
556
    return this.confidenceTotal = Math.min(total, 100);
557
  }
558
}
559
560
if ( typeof module === 'object' ) {
561
  module.exports = wappalyzer;
562
}
563